{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# k-Nearest Neighbors for Multioutput Regression"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fetch yahoo data\n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:51.877Z",
          "iopub.execute_input": "2020-08-11T00:06:51.883Z",
          "iopub.status.idle": "2020-08-11T00:06:52.805Z",
          "shell.execute_reply": "2020-08-11T00:06:52.828Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# Only keep close columns \n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Adj Close  Close  High   Low  Open    Volume\nDate                                                    \n2014-01-02       3.95   3.95  3.98  3.84  3.85  20548400\n2014-01-03       4.00   4.00  4.00  3.88  3.98  22887200\n2014-01-06       4.13   4.13  4.18  3.99  4.01  42398300\n2014-01-07       4.18   4.18  4.25  4.11  4.19  42932100\n2014-01-08       4.18   4.18  4.26  4.14  4.23  30678700",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.85</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>3.98</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.01</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.19</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.23</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:52.813Z",
          "iopub.execute_input": "2020-08-11T00:06:52.818Z",
          "iopub.status.idle": "2020-08-11T00:06:54.029Z",
          "shell.execute_reply": "2020-08-11T00:06:54.013Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.037Z",
          "iopub.execute_input": "2020-08-11T00:06:54.043Z",
          "iopub.status.idle": "2020-08-11T00:06:54.058Z",
          "shell.execute_reply": "2020-08-11T00:06:54.181Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.tail()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "            Adj Close      Close   High        Low       Open     Volume  \\\nDate                                                                       \n2018-08-20  19.980000  19.980000  20.08  19.350000  19.790001   62983200   \n2018-08-21  20.400000  20.400000  20.42  19.860001  19.980000   55629000   \n2018-08-22  20.900000  20.900000  20.92  20.209999  20.280001   62002700   \n2018-08-23  22.290001  22.290001  22.32  21.139999  21.190001  113444100   \n2018-08-24  23.980000  23.980000  24.00  22.670000  22.910000  164328200   \n\n            Increase_Decrease  Buy_Sell_on_Open  Buy_Sell   Returns  \nDate                                                                 \n2018-08-20                  0                 1         1  0.010622  \n2018-08-21                  1                 1         1  0.021021  \n2018-08-22                  1                 1         1  0.024510  \n2018-08-23                  1                 1         1  0.066507  \n2018-08-24                  0                 0         0  0.075819  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Returns</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2018-08-20</th>\n      <td>19.980000</td>\n      <td>19.980000</td>\n      <td>20.08</td>\n      <td>19.350000</td>\n      <td>19.790001</td>\n      <td>62983200</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.010622</td>\n    </tr>\n    <tr>\n      <th>2018-08-21</th>\n      <td>20.400000</td>\n      <td>20.400000</td>\n      <td>20.42</td>\n      <td>19.860001</td>\n      <td>19.980000</td>\n      <td>55629000</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.021021</td>\n    </tr>\n    <tr>\n      <th>2018-08-22</th>\n      <td>20.900000</td>\n      <td>20.900000</td>\n      <td>20.92</td>\n      <td>20.209999</td>\n      <td>20.280001</td>\n      <td>62002700</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.024510</td>\n    </tr>\n    <tr>\n      <th>2018-08-23</th>\n      <td>22.290001</td>\n      <td>22.290001</td>\n      <td>22.32</td>\n      <td>21.139999</td>\n      <td>21.190001</td>\n      <td>113444100</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.066507</td>\n    </tr>\n    <tr>\n      <th>2018-08-24</th>\n      <td>23.980000</td>\n      <td>23.980000</td>\n      <td>24.00</td>\n      <td>22.670000</td>\n      <td>22.910000</td>\n      <td>164328200</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.075819</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.070Z",
          "iopub.execute_input": "2020-08-11T00:06:54.076Z",
          "iopub.status.idle": "2020-08-11T00:06:54.089Z",
          "shell.execute_reply": "2020-08-11T00:06:54.667Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.drop(['Adj Close', 'Open'], axis=1)\n",
        "Y = dataset[['Adj Close', 'Open']]"
      ],
      "outputs": [],
      "execution_count": 5,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.097Z",
          "iopub.execute_input": "2020-08-11T00:06:54.103Z",
          "iopub.status.idle": "2020-08-11T00:06:54.113Z",
          "shell.execute_reply": "2020-08-11T00:06:54.671Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(X.shape, Y.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(1170, 8) (1170, 2)\n"
          ]
        }
      ],
      "execution_count": 6,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.123Z",
          "iopub.execute_input": "2020-08-11T00:06:54.128Z",
          "iopub.status.idle": "2020-08-11T00:06:54.143Z",
          "shell.execute_reply": "2020-08-11T00:06:54.674Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.neighbors import KNeighborsRegressor\n",
        "\n",
        "model = KNeighborsRegressor()\n",
        "model.fit(X, Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": "KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',\n          metric_params=None, n_jobs=None, n_neighbors=5, p=2,\n          weights='uniform')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 7,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.152Z",
          "iopub.execute_input": "2020-08-11T00:06:54.158Z",
          "iopub.status.idle": "2020-08-11T00:06:54.639Z",
          "shell.execute_reply": "2020-08-11T00:06:54.678Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(dataset.describe())"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "         Adj Close        Close         High          Low         Open  \\\n",
            "count  1170.000000  1170.000000  1170.000000  1170.000000  1170.000000   \n",
            "mean      7.002803     7.002803     7.146650     6.855419     7.001342   \n",
            "std       4.861608     4.861608     4.953517     4.754049     4.855620   \n",
            "min       1.620000     1.620000     1.690000     1.610000     1.620000   \n",
            "25%       2.702500     2.702500     2.780000     2.660000     2.710000   \n",
            "50%       4.275000     4.275000     4.350000     4.175000     4.250000   \n",
            "75%      11.550000    11.550000    11.775000    11.307500    11.572500   \n",
            "max      23.980000    23.980000    24.000000    22.670000    22.910000   \n",
            "\n",
            "             Volume  Increase_Decrease  Buy_Sell_on_Open     Buy_Sell  \\\n",
            "count  1.170000e+03        1170.000000       1170.000000  1170.000000   \n",
            "mean   3.763371e+07           0.454701          0.505128     0.499145   \n",
            "std    3.355409e+07           0.498157          0.500188     0.500213   \n",
            "min    0.000000e+00           0.000000          0.000000     0.000000   \n",
            "25%    1.308128e+07           0.000000          0.000000     0.000000   \n",
            "50%    2.900380e+07           0.000000          1.000000     0.000000   \n",
            "75%    5.059335e+07           1.000000          1.000000     1.000000   \n",
            "max    2.683365e+08           1.000000          1.000000     1.000000   \n",
            "\n",
            "           Returns  \n",
            "count  1170.000000  \n",
            "mean      0.002241  \n",
            "std       0.038113  \n",
            "min      -0.242291  \n",
            "25%      -0.015964  \n",
            "50%       0.000353  \n",
            "75%       0.018227  \n",
            "max       0.522901  \n"
          ]
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.648Z",
          "iopub.execute_input": "2020-08-11T00:06:54.653Z",
          "iopub.status.idle": "2020-08-11T00:06:54.695Z",
          "shell.execute_reply": "2020-08-11T00:06:54.682Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input the data_in based on statistics\n",
        "data_in = [[23.98, 22.91, 7.00, 7.00, 1.62, 1.62, 4.27, 4.25]]\n",
        "yhat = model.predict(data_in)"
      ],
      "outputs": [],
      "execution_count": 9,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.701Z",
          "iopub.execute_input": "2020-08-11T00:06:54.706Z",
          "iopub.status.idle": "2020-08-11T00:06:54.713Z",
          "shell.execute_reply": "2020-08-11T00:06:54.776Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# summarize prediction\n",
        "print(yhat[0])"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[2.34400001 2.352     ]\n"
          ]
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.718Z",
          "iopub.execute_input": "2020-08-11T00:06:54.722Z",
          "iopub.status.idle": "2020-08-11T00:06:54.733Z",
          "shell.execute_reply": "2020-08-11T00:06:54.780Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.score(X, Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "0.7053689393640217"
          },
          "metadata": {}
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-08-11T00:06:54.739Z",
          "iopub.execute_input": "2020-08-11T00:06:54.742Z",
          "iopub.status.idle": "2020-08-11T00:06:54.751Z",
          "shell.execute_reply": "2020-08-11T00:06:54.790Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "file_extension": ".py",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "mimetype": "text/x-python",
      "version": "3.5.5",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      }
    },
    "kernelspec": {
      "argv": [
        "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
        "-m",
        "ipykernel_launcher",
        "-f",
        "{connection_file}"
      ],
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "nteract": {
      "version": "0.24.1"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}